Add a `user_agent` option to WebsiteAgent.

Akinori MUSHA 11 years ago
parent
commit
e8751af629
2 changed files with 41 additions and 0 deletions
  1. 10 0
      app/models/agents/website_agent.rb
  2. 31 0
      spec/models/agents/website_agent_spec.rb

+ 10 - 0
app/models/agents/website_agent.rb

@@ -47,6 +47,8 @@ module Agents
47 47
 
48 48
       Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset.
49 49
 
50
+      Set `user_agent` to a custom User-Agent name if the website does not like the default value ("Faraday v#{Faraday::VERSION}").
51
+
50 52
       The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload.
51 53
     MD
52 54
 
@@ -105,6 +107,10 @@ module Agents
105 107
         end
106 108
       end
107 109
 
110
+      if options['user_agent'].present?
111
+        errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String)
112
+      end
113
+
108 114
       begin
109 115
         basic_auth_credentials()
110 116
       rescue => e
@@ -281,6 +287,10 @@ module Agents
281 287
 
282 288
     def faraday
283 289
       @faraday ||= Faraday.new { |builder|
290
+        if (user_agent = options['user_agent']).present?
291
+          builder.headers[:user_agent] = user_agent
292
+        end
293
+
284 294
         builder.use FaradayMiddleware::FollowRedirects
285 295
         builder.request :url_encoded
286 296
         if userinfo = basic_auth_credentials()

+ 31 - 0
spec/models/agents/website_agent_spec.rb

@@ -376,4 +376,35 @@ describe Agents::WebsiteAgent do
376 376
       end
377 377
     end
378 378
   end
379
+
380
+  describe "checking with User-Agent" do
381
+    before do
382
+      stub_request(:any, /example/).
383
+        with(headers: { 'User-Agent' => 'Sushi' }).
384
+        to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
385
+      @site = {
386
+        'name' => "XKCD",
387
+        'expected_update_period_in_days' => 2,
388
+        'type' => "html",
389
+        'url' => "http://www.example.com",
390
+        'mode' => 'on_change',
391
+        'extract' => {
392
+          'url' => { 'css' => "#comic img", 'attr' => "src" },
393
+          'title' => { 'css' => "#comic img", 'attr' => "alt" },
394
+          'hovertext' => { 'css' => "#comic img", 'attr' => "title" }
395
+        },
396
+        'user_agent' => "Sushi"
397
+      }
398
+      @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site)
399
+      @checker.user = users(:bob)
400
+      @checker.save!
401
+    end
402
+
403
+    describe "#check" do
404
+      it "should check for changes" do
405
+        lambda { @checker.check }.should change { Event.count }.by(1)
406
+        lambda { @checker.check }.should_not change { Event.count }
407
+      end
408
+    end
409
+  end
379 410
 end